import requests
from useragent_helper import get_random_ug
from lxml.html import etree


url = input("请输入需要爬取的问卷星调查网址：\nhttps://www.wjx.cn/")
if "https://www.wjx.cn/" in url:
    url = url
    print("已帮您自动更正网址： " + url)
else:
    url = "https://www.wjx.cn/" + url

print("正在尝试访问网站...", end='')
resp = requests.get(url, headers={'User-agent': get_random_ug()})
content = resp.text
print("ok")

print("正在创建etree对象...", end='')
tree = etree.HTML(content)
print("ok")

print("正在提取标题...", end='')
try:
    title = tree.xpath('''//*[@id="htitle"]/text()''')[0].strip()
    print("ok")
except IndexError:
    print("error")
    input("标题提取错误，按下enter以继续，关闭窗口以终止。")
    title = " "

print("正在提取试卷说明...", end='')
try:
    describe = tree.xpath('''//*[@id="divDesc"]/span/text()''')[0].strip()
    print("ok")
except IndexError:
    describe = []
    print("error")

print("正在提取题目...", end='')
questions = []
divs = tree.xpath('''/html/body/div[1]/form/div[6]/div[@id="divQuestion"]/fieldset/div''')
for div in divs:
    field_label = div.xpath('''./div[@class="field-label"]/text()''')[0]
    uis = div.xpath('''./div[2]/div/div/text()''')
    questions.append([field_label, uis])
print("ok")


input("\n题目已全部提取完毕，按下回车以显示题目。\n")
print(title)
if not describe == []:
    print(describe)
for question in questions:
    for i in question:
        print(i)
